{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LlrEjmtJNpuX"
      },
      "source": [
        "# Using CrewAI with Elasticsearch\n",
        "\n",
        "This notebook demonstrates how to use CrewAI with Elasticsearch. This notebook is based on the article [Using CrewAI with Elasticsearch](https://www.elastic.co/search-labs/blog/using-crewai-with-elasticsearch)."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GNaAN-GNO5qp"
      },
      "source": [
        "## Installing dependencies and importing packages"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "qgclZayCk1Ct",
        "outputId": "f25349ea-412a-411c-9677-517f933fa5a4"
      },
      "outputs": [],
      "source": [
        "# It is suggested to run this script with Python 3.11\n",
        "%pip install elasticsearch==8.17 'crewai[tools]'"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rAesontNXpLu",
        "outputId": "a951f429-6aa0-48c0-e7d1-aebab0be9e6c"
      },
      "outputs": [],
      "source": [
        "import json\n",
        "import os\n",
        "\n",
        "from getpass import getpass\n",
        "from elasticsearch import Elasticsearch\n",
        "from elasticsearch.helpers import bulk\n",
        "\n",
        "from crewai import Agent, Crew, Task\n",
        "from crewai.tools import tool\n",
        "from crewai_tools import SerperDevTool, WebsiteSearchTool"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "NwOmnk99Pfh3"
      },
      "source": [
        "## Declaring variables"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 339
        },
        "id": "GVKJKfFpPWuj",
        "outputId": "693babf7-0cf0-413d-bfde-4353b3bff938"
      },
      "outputs": [],
      "source": [
        "os.environ[\"ELASTIC_ENDPOINT\"] = getpass(\"Elastic Enpoint: \")\n",
        "os.environ[\"ELASTIC_API_KEY\"] = getpass(\"Elastic Api Key: \")\n",
        "os.environ[\"SERPER_API_KEY\"] = getpass(\"Serper API Key: \")\n",
        "os.environ[\"OPENAI_API_KEY\"] = getpass(\"OpenAI API Key: \")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3O2HclcYHEsS"
      },
      "source": [
        "## Instance a Elasticsearch client"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1LWiop8NYiQF"
      },
      "outputs": [],
      "source": [
        "# Elasticsearch client\n",
        "_client = Elasticsearch(\n",
        "    hosts=os.environ[\"ELASTIC_ENDPOINT\"],\n",
        "    api_key=os.environ[\"ELASTIC_API_KEY\"],\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9lvPHaXjPlfu"
      },
      "source": [
        "## Creating mappings and inference endpoint"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jInERNQajoNh",
        "outputId": "8b1fb48a-0781-4931-abb1-4d2e8340c647"
      },
      "outputs": [],
      "source": [
        "try:\n",
        "    _client.options(\n",
        "        request_timeout=60, max_retries=3, retry_on_timeout=True\n",
        "    ).inference.put(\n",
        "        task_type=\"sparse_embedding\",\n",
        "        inference_id=\"clothes-inference\",\n",
        "        body={\n",
        "            \"service\": \"elasticsearch\",\n",
        "            \"service_settings\": {\n",
        "                \"adaptive_allocations\": {\"enabled\": True},\n",
        "                \"num_threads\": 1,\n",
        "                \"model_id\": \".elser_model_2\",\n",
        "            },\n",
        "        },\n",
        "    )\n",
        "\n",
        "    print(\"Inference endpoint created successfully.\")\n",
        "\n",
        "except Exception as e:\n",
        "    print(\n",
        "        f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n",
        "    )"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "*NOTE: After creating the inference endpoint, it is highly recommended to wait 30 seconds for the model to be ready before sending requests.*"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tc88YzAYw31e",
        "outputId": "d2de37eb-e621-41c4-a1e0-6d14264d303d"
      },
      "outputs": [],
      "source": [
        "try:\n",
        "    _client.indices.create(\n",
        "        index=\"summer-clothes\",\n",
        "        body={\n",
        "            \"mappings\": {\n",
        "                \"properties\": {\n",
        "                    \"title\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n",
        "                    \"description\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n",
        "                    \"price\": {\"type\": \"float\"},\n",
        "                    \"semantic_field\": {\n",
        "                        \"type\": \"semantic_text\",\n",
        "                        \"inference_id\": \"clothes-inference\",\n",
        "                    },\n",
        "                }\n",
        "            }\n",
        "        },\n",
        "    )\n",
        "except Exception as e:\n",
        "    print(\n",
        "        f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n",
        "    )"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "G1634zNrv1OS"
      },
      "source": [
        "# Indexing documents"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "LVr6TR8qlw2M"
      },
      "outputs": [],
      "source": [
        "documents = [\n",
        "    {\n",
        "        \"title\": \"Twist-Detail Crop Top\",\n",
        "        \"description\": \"Fitted crop top in woven, patterned fabric with linen content. Wide shoulder straps, sweetheart neckline, and gathered side seams for a gently draped effect. Twisted detail at center bust, cut-out section at front, and wide smocking at back. Lined\",\n",
        "        \"price\": 34.99,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Rib-knit Tank Top\",\n",
        "        \"description\": \"Short, fitted top in a soft rib knit. Extra-narrow shoulder straps and a square neckline.\",\n",
        "        \"price\": 7.49,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Linen-blend Shorts\",\n",
        "        \"description\": \"Shorts in an airy, woven linen blend. High, ruffle-trimmed waist, narrow drawstring and covered elastic at waistband, and discreet side pockets.\",\n",
        "        \"price\": 13.99,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Twill Cargo Shorts\",\n",
        "        \"description\": \"Fitted shorts in cotton twill with a V-shaped yoke at front and back. High waist, zip fly with button, and patch front pockets.\",\n",
        "        \"price\": 20.99,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Slim Fit Ribbed Tank Top\",\n",
        "        \"description\": \"Slim-fit tank top in medium weight, ribbed cotton-blend jersey with a fitted silhouette. Straight-cut hem.\",\n",
        "        \"price\": 8.49,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Relaxed Fit Linen Resort Shirt\",\n",
        "        \"description\": \"Relaxed-fit shirt in airy linen. Resort collar, buttons without placket, yoke at back, and short sleeves. Straight-cut hem. Fabric made from linen is breathable, looks great when ironed or wrinkled, and softens over time.\",\n",
        "        \"price\": 17.99,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Swim Shorts\",\n",
        "        \"description\": \"Swim shorts in woven fabric. Drawstring and covered elastic at waistband, side pockets, and a back pocket with hook-loop fastener. Small slit at sides. Mesh liner shorts.\",\n",
        "        \"price\": 14.99,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Baggy Fit Cargo Shorts\",\n",
        "        \"description\": \"Baggy-fit cargo shorts in cotton canvas with a generous but not oversized silhouette. Zip fly with button, diagonal side pockets, back pockets with flap and snap fasteners, and bellows leg pockets with snap fasteners.\",\n",
        "        \"price\": 20.99,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Muslin Shorts\",\n",
        "        \"description\": \"Shorts in airy cotton muslin. High, ruffle-trimmed waist, covered elastic at waistband, and an extra-narrow drawstring with a bead at ends. Discreet side pockets.\",\n",
        "        \"price\": 15.99,\n",
        "    },\n",
        "    {\n",
        "        \"title\": \"Oversized Lyocell-blend Dress\",\n",
        "        \"description\": \"Short, oversized dress in a woven lyocell blend. Gathered, low-cut V-neck with extra-narrow ties at front, 3/4-length, raglan-cut balloon sleeves with narrow elastic at cuffs, and seams at waist and hips with delicate piping. Unlined.\",\n",
        "        \"price\": 38.99,\n",
        "    },\n",
        "]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "N6cI3toLl3DP",
        "outputId": "e6de913a-0b4d-480e-a736-ac3afda2b568"
      },
      "outputs": [],
      "source": [
        "def build_data():\n",
        "    for doc in documents:\n",
        "        yield {\"_index\": \"summer-clothes\", \"_source\": doc}\n",
        "\n",
        "\n",
        "try:\n",
        "    success, errors = bulk(_client, build_data())\n",
        "    print(f\"{success} documents indexed successfully\")\n",
        "    if errors:\n",
        "        print(\"Errors during indexing:\", errors)\n",
        "\n",
        "except Exception as e:\n",
        "    print(f\"Error: {str(e)}\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SxI_L3Kev5Tk"
      },
      "source": [
        "# Creating CrewAI custom tool"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "eI_Mi-J3oFwr"
      },
      "outputs": [],
      "source": [
        "@tool(\"Elasticsearch custom tool\")\n",
        "def elasticsearch_tool(question: str) -> str:\n",
        "    \"\"\"\n",
        "    Search in Elasticsearch using semantic search capabilities.\n",
        "\n",
        "    Args:\n",
        "        question (str): The search query to be semantically matched\n",
        "\n",
        "    Returns:\n",
        "        str: Concatenated hits from Elasticsearch as string JSON\n",
        "    \"\"\"\n",
        "\n",
        "    response = _client.search(\n",
        "        index=\"summer-clothes\",\n",
        "        body={\n",
        "            \"size\": 10,\n",
        "            \"_source\": {\"includes\": [\"description\", \"title\", \"price\"]},\n",
        "            \"retriever\": {\n",
        "                \"rrf\": {\n",
        "                    \"retrievers\": [\n",
        "                        {\"standard\": {\"query\": {\"match\": {\"title\": question}}}},\n",
        "                        {\n",
        "                            \"standard\": {\n",
        "                                \"query\": {\n",
        "                                    \"semantic\": {\n",
        "                                        \"field\": \"semantic_field\",\n",
        "                                        \"query\": question,\n",
        "                                    }\n",
        "                                }\n",
        "                            }\n",
        "                        },\n",
        "                    ]\n",
        "                }\n",
        "            },\n",
        "        },\n",
        "    )\n",
        "\n",
        "    hits = response[\"hits\"][\"hits\"]\n",
        "\n",
        "    if not hits:\n",
        "        return \"\"\n",
        "\n",
        "    result = json.dumps([hit[\"_source\"] for hit in hits], indent=2)\n",
        "\n",
        "    return result"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "9boWureEn12E"
      },
      "outputs": [],
      "source": [
        "search_tool = SerperDevTool()\n",
        "web_rag_tool = WebsiteSearchTool()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "X_0NBD1xv87q"
      },
      "source": [
        "# Setup Agents and Tasks"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "KP8H1t-gqpDL"
      },
      "outputs": [],
      "source": [
        "es_retriever_agent = Agent(\n",
        "    role=\"Retriever\",\n",
        "    goal=\"Retrieve Elasticsearch documents\",\n",
        "    backstory=\"You are an expert researcher\",\n",
        "    tools=[elasticsearch_tool],\n",
        "    verbose=True,\n",
        ")\n",
        "\n",
        "\n",
        "internet_researcher_agent = Agent(\n",
        "    role=\"Research analyst\",\n",
        "    goal=\"Provide up-to-date market analysis of the industry\",\n",
        "    backstory=\"You are an expert analyst\",\n",
        "    tools=[search_tool, web_rag_tool],\n",
        "    verbose=True,\n",
        ")\n",
        "\n",
        "\n",
        "writer_agent = Agent(\n",
        "    role=\"Content Writer\",\n",
        "    goal=\"Craft engaging blog posts about the information gathered\",\n",
        "    backstory=\"A skilled writer with a passion for writing about fashion\",\n",
        "    tools=[],\n",
        "    verbose=True,\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "5j88PRahqrwM"
      },
      "outputs": [],
      "source": [
        "es_retriever_task = Task(\n",
        "    description=\"Retrieve documents from the Elasticsearch index.\",\n",
        "    expected_output=\"A list of documents retrieved from the Elasticsearch index based on the query.\",\n",
        "    agent=es_retriever_agent,\n",
        ")\n",
        "\n",
        "\n",
        "internet_research_task = Task(\n",
        "    description=\"Research the latest trends in the summer clothes industry and provide a summary.\",\n",
        "    expected_output=\"A summary of the top 5 trending clothes for summer with a unique perspective on their significance.\",\n",
        "    agent=internet_researcher_agent,\n",
        ")\n",
        "\n",
        "write_task = Task(\n",
        "    description=\"Compare and contrast the information provided from the Retriever agent and the research agent, use cites. Be short\",\n",
        "    expected_output=\"Short report about Elasticsearch retriever and researcher.\",\n",
        "    agent=writer_agent,\n",
        "    output_file=\"blog-posts/new_post.md\",  # The final blog post will be saved here\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bfOm8wdvwF7N"
      },
      "source": [
        "# Executing task"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "LXqw4o62qyFN",
        "outputId": "549a6001-beaf-47ad-d430-ef57813c6a6b"
      },
      "outputs": [],
      "source": [
        "# Use in a crew\n",
        "crew = Crew(\n",
        "    agents=[es_retriever_agent, internet_researcher_agent, writer_agent],\n",
        "    tasks=[\n",
        "        es_retriever_task,\n",
        "        internet_research_task,\n",
        "        write_task,\n",
        "    ],\n",
        ")\n",
        "\n",
        "# Execute tasks\n",
        "crew.kickoff()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "S6WZMJayyzxh"
      },
      "source": [
        "## Deleting\n",
        "\n",
        "Delete the resources used to prevent them from consuming resources."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9UcQwa41yy_x",
        "outputId": "8c8337e1-8685-4c33-ae51-ee61e323f0cd"
      },
      "outputs": [],
      "source": [
        "# Cleanup - Delete Index\n",
        "_client.indices.delete(index=\"summer-clothes\", ignore=[400, 404])\n",
        "\n",
        "# Cleanup - Inference endpoint\n",
        "_client.inference.delete(inference_id=\"clothes-inference\", ignore=[400, 404])"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "3.11.11",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.11"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
